library(PoissonBinomial)
library(FNN)
library(LaplacesDemon)

# preliminaries
set.seed(2022)

# control constants
sampleSizes <- c(100, 1000) # precinct sample sizes to consider
dShifts <- -0.2             # proportional miss in predicted vs. actual votes

# list of the six data-generating processes 
dgps <- c('uniform', 'closeToZero', 'closeToOne', 'extremal', 'central', 'bimodal')

############################################
##            helper functions            ##
############################################

# logit swing function
logitSwing <- function(probs, D, lwr = 0, upr = 5e4, eps = 1e-8) {
  
  # compute logit swing
  while(upr - lwr > eps) {
    mid <- (lwr + upr)/2
    
    if(sum(1/(1 + (1-probs)/probs*mid)) < D)
      upr <- mid
    else
      lwr <- mid
  }

  return(1/(1 + (1-probs)/probs*mid))
}

# simulation function
simulate <- function(dist, n, dShift, eps = 1e-8) {

  # generate the data
  if(dist == 'uniform')
    probs <- runif(n)
  else if(dist == 'closeToZero')
    probs <- rbeta(n, 0.1, 3)
  else if(dist == 'closeToOne')
    probs <- rbeta(n, 3, 0.1)
  else if(dist == 'extremal')
    probs <- sample(c(rbeta(n/2, 0.1, 3), rbeta(n/2, 3, 0.1)))
  else if(dist == 'central')
    probs <- rbeta(n, 3, 3)
  else if(dist == 'bimodal')
    probs <- sample(c(rbeta(n/2, 3, 10), rbeta(n/2, 10, 3)))
  
  # get the true outcomes 
  D <- round(sum(probs)*(1 + dShift))
  if(D > n) # error checking
    return(c(KLD = NA, rmse = NA, oneMinusRSq = NA))
  
  # compute the conditional probabilities 
  condProbs <- sapply(1:n, FUN = function(i) {
    probs[i]*dpbinom(D - 1, probs[-i])/dpbinom(D, probs)
  })
  
  # compute the logit swung probabilities 
  ls.probs <- logitSwing(probs, D)
  
  # compute and return summary statistics 
  ls.probs <- ifelse(ls.probs == 1, 1 - 1e-16, ifelse(ls.probs == 0, 1e-16, ls.probs))      # dealing with round-off error for 
  condProbs <- ifelse(condProbs == 1, 1 - 1e-16, ifelse(condProbs == 0, 1e-16, condProbs))  # for computing KLD values 
  
  c(KLD = sum(condProbs * log(condProbs/ls.probs) + (1 - condProbs) * log((1 - condProbs)/(1 - ls.probs))),
    rmse = sqrt(mean((condProbs - ls.probs)^2)),
    oneMinusRSq = sum((condProbs - ls.probs)^2)/sum((condProbs - mean(condProbs))^2),
    cor = cor(condProbs, ls.probs))
}

############################################
##             calling script             ##
############################################

# iterate through every combination of error, data-generating process, and sample size
results <- lapply(dShifts, FUN = function(dShift) {
  res <- lapply(dgps, FUN = function(dgp) {
    t(sapply(sampleSizes, FUN = function(n) {
      
      # run the simulation
      simulate(dgp, n, dShift, eps = 1e-8)
    }))
  })
  do.call(rbind, res)
})

# clean up 
results <- data.frame(do.call(rbind, results))
results <- data.frame(cbind(results, expand.grid(sampleSizes, dgps, dShifts)))
names(results)[5:7] <- c("sampleSize", "dgp", "dShift")
results <- results[, c("sampleSize", "dgp", "dShift", "rmse", "oneMinusRSq", "KLD")]

# print the results
Table1 <- data.frame(results[,-3])
Table1[,3:5] <- signif(Table1[,3:5], 3)
Table1 <- Table1[,c(2, 1, 3, 4, 5)]
print(Table1)
